/*
This code attempts to estimate treatment effects of ppp
*/

do "${dodir}/make_globals.do"

ssc install drdid, all replace
ssc install csdid, all replace

capture log close
log using $outdir/f941_regs_1.log, replace

////////////////////////////////////////////////////////////////////////////////
// step 1: organize data
////////////////////////////////////////////////////////////////////////////////

use "${datadir}/analysis_data_2", clear

egen q=group(quarter)

// FOR NOW: drop if 2022 q1 or later, as data are incomplete
drop if q>16

* fill in missing observations
preserve

	collapse (lastnm) app_quarter_first app_quarter_second, by(id)
	
	duplicates drop

	expand 16

	bysort tin: gen q = _n

	save $datadir/temp_matrix, replace
	
restore

preserve 

	collapse (lastnm) quarter, by(q)
	
	save $datadir/temp_q, replace
	
restore

merge 1:1 id q using $datadir/temp_matrix, update replace nogen
merge m:1 q using $datadir/temp_q, update replace nogen

replace total_compensation=0 if total_compensation==.
replace num_employees=0 if num_employees==.

// time series
tsset id q

// setting up in the notation of Sant'Anna et al.
// following this slide deck: stata.com/meeting/us21/slides/us21_santanna.pdf

// unit (i) is treated in period (t)
g d=0
	replace d=(quarter==app_quarter_first | quarter==app_quarter_second)

// unit (i) is never treated
egen c=max(d), by(id)
	replace c=1-c
	
// unit (i) is first treated at time g ("treatment starting-time")
g pre_g=.
	replace pre_g=q if d==1
	
egen g = min(pre_g), by(id)
replace g = 0 if g==.
drop pre_g
tab g
	
// treat_wave: never, first wave, second wave
g treated=d
	replace treated=1 if l.treated==1

g treat_wave=(g>0 & g~=.)
	replace treat_wave=2 if d==1 & q>g

// FOR NOW: flag if receive second wave
egen mt=max(treat_wave), by(id)

// create numeric state
egen st=group(firm_state)
egen st_mode=mode(st), by(id) maxmode

// create min and max emp
g x_2018=num_employees if q<=4

g x_2019=num_employees if q>=5 & q<=8

g x_2020q1=num_employees if q==9
	
egen min_emp_2018=min(x_2018), by(id)
egen min_emp_2019=min(x_2019), by(id)
egen min_emp_2020q1=min(x_2020q1), by(id)
drop x_2018 x_2019 x_2020q1

egen naics_2_mode=mode(naics_2), by(id) maxmode
egen naics_4_mode=mode(naics_4), by(id) maxmode

replace g=11 if g==12

local cpic2018 = 141.8
local cpic2019 = 143.9
local cpic2020 = 145.4
local cpic2021 = 152.1
local cpic2022 = 163.7

local dolladollabill med_wage total_compensation
foreach var of varlist `dolladollabill'{
	gen `var'20=.
	foreach yr of numlist 2018/2022{
		replace `var'20= `var' * `cpic2020' / `cpic`yr'' if tax_yr==`yr'
	}
	rename `var' `var'_nom
	rename `var'20 `var'
}

save ${datadir}/reg_data, replace
